In-class Exercise 5

Getting start

pacman::p_load(sf,tidyverse, funModeling, blorr, corrplot, ggpubr, sf, spdep, GWmodel, tmap, skimr, caret, report)

Data import

Save data as rds for data for the ease of other people.

Osun <- read_rds("rds/Osun.rds")
Osun_wp_sf <- read_rds("rds/Osun_wp_sf.rds")
Osun_wp_sf %>%
  freq(input = 'status')
Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
of ggplot2 3.3.4.
ℹ The deprecated feature was likely used in the funModeling package.
  Please report the issue at <https://github.com/pablo14/funModeling/issues>.

  status frequency percentage cumulative_perc
1   TRUE      2642       55.5            55.5
2  FALSE      2118       44.5           100.0
tmap_mode("view")
tmap mode set to interactive viewing
tm_shape(Osun)+
# tmap_options(check.and.fix = TRUE)
  tm_polygons(alpha = 0.4) +
tm_shape(Osun_wp_sf) +
  tm_dots(col = "status",
          alpha = 0.6) +
  tm_view(set.zoom.limits = c(9,12))

EDA

summary statistics with skimr

Osun_wp_sf %>%
  skim()
Warning: Couldn't find skimmers for class: sfc_POINT, sfc; No user-defined `sfl`
provided. Falling back to `character`.
Data summary
Name Piped data
Number of rows 4760
Number of columns 75
_______________________
Column type frequency:
character 47
logical 5
numeric 23
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
source 0 1.00 5 44 0 2 0
report_date 0 1.00 22 22 0 42 0
status_id 0 1.00 2 7 0 3 0
water_source_clean 0 1.00 8 22 0 3 0
water_source_category 0 1.00 4 6 0 2 0
water_tech_clean 24 0.99 9 23 0 3 0
water_tech_category 24 0.99 9 15 0 2 0
facility_type 0 1.00 8 8 0 1 0
clean_country_name 0 1.00 7 7 0 1 0
clean_adm1 0 1.00 3 5 0 5 0
clean_adm2 0 1.00 3 14 0 35 0
clean_adm3 4760 0.00 NA NA 0 0 0
clean_adm4 4760 0.00 NA NA 0 0 0
installer 4760 0.00 NA NA 0 0 0
management_clean 1573 0.67 5 37 0 7 0
status_clean 0 1.00 9 32 0 7 0
pay 0 1.00 2 39 0 7 0
fecal_coliform_presence 4760 0.00 NA NA 0 0 0
subjective_quality 0 1.00 18 20 0 4 0
activity_id 4757 0.00 36 36 0 3 0
scheme_id 4760 0.00 NA NA 0 0 0
wpdx_id 0 1.00 12 12 0 4760 0
notes 0 1.00 2 96 0 3502 0
orig_lnk 4757 0.00 84 84 0 1 0
photo_lnk 41 0.99 84 84 0 4719 0
country_id 0 1.00 2 2 0 1 0
data_lnk 0 1.00 79 96 0 2 0
water_point_history 0 1.00 142 834 0 4750 0
clean_country_id 0 1.00 3 3 0 1 0
country_name 0 1.00 7 7 0 1 0
water_source 0 1.00 8 30 0 4 0
water_tech 0 1.00 5 37 0 20 0
adm2 0 1.00 3 14 0 33 0
adm3 4760 0.00 NA NA 0 0 0
management 1573 0.67 5 47 0 7 0
adm1 0 1.00 4 5 0 4 0
New Georeferenced Column 0 1.00 16 35 0 4760 0
lat_lon_deg 0 1.00 13 32 0 4760 0
public_data_source 0 1.00 84 102 0 2 0
converted 0 1.00 53 53 0 1 0
created_timestamp 0 1.00 22 22 0 2 0
updated_timestamp 0 1.00 22 22 0 2 0
Geometry 0 1.00 33 37 0 4760 0
ADM2_EN 0 1.00 3 14 0 30 0
ADM2_PCODE 0 1.00 8 8 0 30 0
ADM1_EN 0 1.00 4 4 0 1 0
ADM1_PCODE 0 1.00 5 5 0 1 0

Variable type: logical

skim_variable n_missing complete_rate mean count
rehab_year 4760 0 NaN :
rehabilitator 4760 0 NaN :
is_urban 0 1 0.39 FAL: 2884, TRU: 1876
latest_record 0 1 1.00 TRU: 4760
status 0 1 0.56 TRU: 2642, FAL: 2118

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
row_id 0 1.00 68550.48 10216.94 49601.00 66874.75 68244.50 69562.25 471319.00 ▇▁▁▁▁
lat_deg 0 1.00 7.68 0.22 7.06 7.51 7.71 7.88 8.06 ▁▂▇▇▇
lon_deg 0 1.00 4.54 0.21 4.08 4.36 4.56 4.71 5.06 ▃▆▇▇▂
install_year 1144 0.76 2008.63 6.04 1917.00 2006.00 2010.00 2013.00 2015.00 ▁▁▁▁▇
fecal_coliform_value 4760 0.00 NaN NA NA NA NA NA NA
distance_to_primary_road 0 1.00 5021.53 5648.34 0.01 719.36 2972.78 7314.73 26909.86 ▇▂▁▁▁
distance_to_secondary_road 0 1.00 3750.47 3938.63 0.15 460.90 2554.25 5791.94 19559.48 ▇▃▁▁▁
distance_to_tertiary_road 0 1.00 1259.28 1680.04 0.02 121.25 521.77 1834.42 10966.27 ▇▂▁▁▁
distance_to_city 0 1.00 16663.99 10960.82 53.05 7930.75 15030.41 24255.75 47934.34 ▇▇▆▃▁
distance_to_town 0 1.00 16726.59 12452.65 30.00 6876.92 12204.53 27739.46 44020.64 ▇▅▃▃▂
rehab_priority 2654 0.44 489.33 1658.81 0.00 7.00 91.50 376.25 29697.00 ▇▁▁▁▁
water_point_population 4 1.00 513.58 1458.92 0.00 14.00 119.00 433.25 29697.00 ▇▁▁▁▁
local_population_1km 4 1.00 2727.16 4189.46 0.00 176.00 1032.00 3717.00 36118.00 ▇▁▁▁▁
crucialness_score 798 0.83 0.26 0.28 0.00 0.07 0.15 0.35 1.00 ▇▃▁▁▁
pressure_score 798 0.83 1.46 4.16 0.00 0.12 0.41 1.24 93.69 ▇▁▁▁▁
usage_capacity 0 1.00 560.74 338.46 300.00 300.00 300.00 1000.00 1000.00 ▇▁▁▁▅
days_since_report 0 1.00 2692.69 41.92 1483.00 2688.00 2693.00 2700.00 4645.00 ▁▇▁▁▁
staleness_score 0 1.00 42.80 0.58 23.13 42.70 42.79 42.86 62.66 ▁▁▇▁▁
location_id 0 1.00 235865.49 6657.60 23741.00 230638.75 236199.50 240061.25 267454.00 ▁▁▁▁▇
cluster_size 0 1.00 1.05 0.25 1.00 1.00 1.00 1.00 4.00 ▇▁▁▁▁
lat_deg_original 4760 0.00 NaN NA NA NA NA NA NA
lon_deg_original 4760 0.00 NaN NA NA NA NA NA NA
count 0 1.00 1.00 0.00 1.00 1.00 1.00 1.00 1.00 ▁▁▇▁▁

20 percent missing data point is already not good the analysis.

Osun_wp_sf_clean <- Osun_wp_sf %>%
  filter_at(vars(status,
                 distance_to_primary_road,
                 distance_to_secondary_road,
                 distance_to_tertiary_road,
                 distance_to_city,
                 distance_to_town,
                 water_point_population,
                 local_population_1km,
                 usage_capacity,
                 is_urban,
                 water_source_clean),
            all_vars(!is.na(.))) %>%
  mutate(usage_capacity = as.factor(usage_capacity))

Correlation Analysis

Osun_wp <- Osun_wp_sf_clean %>%
  select(c(7,35:39,42:43, 46:47, 57)) %>%
  st_set_geometry(NULL)
cluster_vars.cor = cor(
  Osun_wp[,2:7])
corrplot.mixed(cluster_vars.cor,
               lower = "ellipse",
               upper = "number",
               tl.pos = "lt",
               diag = "l",
               tl.col = "black")

model <- glm(status ~ distance_to_primary_road+
               distance_to_secondary_road+
               distance_to_tertiary_road+
               distance_to_city+
               distance_to_town+
               is_urban+
               usage_capacity+
               water_source_clean+
               water_point_population+
               local_population_1km,
             data = Osun_wp_sf_clean,
             family = binomial(link = 'logit'))
model                 

Call:  glm(formula = status ~ distance_to_primary_road + distance_to_secondary_road + 
    distance_to_tertiary_road + distance_to_city + distance_to_town + 
    is_urban + usage_capacity + water_source_clean + water_point_population + 
    local_population_1km, family = binomial(link = "logit"), 
    data = Osun_wp_sf_clean)

Coefficients:
                             (Intercept)  
                               3.887e-01  
                distance_to_primary_road  
                              -4.642e-06  
              distance_to_secondary_road  
                              -5.143e-06  
               distance_to_tertiary_road  
                               9.683e-05  
                        distance_to_city  
                              -1.686e-05  
                        distance_to_town  
                              -1.480e-05  
                            is_urbanTRUE  
                              -2.971e-01  
                      usage_capacity1000  
                              -6.230e-01  
water_source_cleanProtected Shallow Well  
                               5.040e-01  
      water_source_cleanProtected Spring  
                               1.288e+00  
                  water_point_population  
                              -5.097e-04  
                    local_population_1km  
                               3.451e-04  

Degrees of Freedom: 4755 Total (i.e. Null);  4744 Residual
Null Deviance:      6534 
Residual Deviance: 5688     AIC: 5712

Using blr_regress() for a better report.

blr_regress(model)
                             Model Overview                              
------------------------------------------------------------------------
Data Set    Resp Var    Obs.    Df. Model    Df. Residual    Convergence 
------------------------------------------------------------------------
  data       status     4756      4755           4744           TRUE     
------------------------------------------------------------------------

                    Response Summary                     
--------------------------------------------------------
Outcome        Frequency        Outcome        Frequency 
--------------------------------------------------------
   0             2114              1             2642    
--------------------------------------------------------

                                 Maximum Likelihood Estimates                                   
-----------------------------------------------------------------------------------------------
               Parameter                    DF    Estimate    Std. Error    z value     Pr(>|z|) 
-----------------------------------------------------------------------------------------------
              (Intercept)                   1      0.3887        0.1124      3.4588       5e-04 
        distance_to_primary_road            1      0.0000        0.0000     -0.7153      0.4744 
       distance_to_secondary_road           1      0.0000        0.0000     -0.5530      0.5802 
       distance_to_tertiary_road            1      1e-04         0.0000      4.6708      0.0000 
            distance_to_city                1      0.0000        0.0000     -4.7574      0.0000 
            distance_to_town                1      0.0000        0.0000     -4.9170      0.0000 
              is_urbanTRUE                  1     -0.2971        0.0819     -3.6294       3e-04 
           usage_capacity1000               1     -0.6230        0.0697     -8.9366      0.0000 
water_source_cleanProtected Shallow Well    1      0.5040        0.0857      5.8783      0.0000 
   water_source_cleanProtected Spring       1      1.2882        0.4388      2.9359      0.0033 
         water_point_population             1      -5e-04        0.0000    -11.3686      0.0000 
          local_population_1km              1      3e-04         0.0000     19.2953      0.0000 
-----------------------------------------------------------------------------------------------

 Association of Predicted Probabilities and Observed Responses  
---------------------------------------------------------------
% Concordant          0.7347          Somers' D        0.4693   
% Discordant          0.2653          Gamma            0.4693   
% Tied                0.0000          Tau-a            0.2318   
Pairs                5585188          c                0.7347   
---------------------------------------------------------------
report(model)
We fitted a logistic model (estimated using ML) to predict status with
distance_to_primary_road (formula: status ~ distance_to_primary_road +
distance_to_secondary_road + distance_to_tertiary_road + distance_to_city +
distance_to_town + is_urban + usage_capacity + water_source_clean +
water_point_population + local_population_1km). The model's explanatory power
is moderate (Tjur's R2 = 0.16). The model's intercept, corresponding to
distance_to_primary_road = 0, is at 0.39 (95% CI [0.17, 0.61], p < .001).
Within this model:

  - The effect of distance to primary road is statistically non-significant and
negative (beta = -4.64e-06, 95% CI [-1.74e-05, 8.07e-06], p = 0.474; Std. beta
= -0.03, 95% CI [-0.10, 0.05])
  - The effect of distance to secondary road is statistically non-significant and
negative (beta = -5.14e-06, 95% CI [-2.34e-05, 1.31e-05], p = 0.580; Std. beta
= -0.02, 95% CI [-0.09, 0.05])
  - The effect of distance to tertiary road is statistically significant and
positive (beta = 9.68e-05, 95% CI [5.64e-05, 1.38e-04], p < .001; Std. beta =
0.16, 95% CI [0.09, 0.23])
  - The effect of distance to city is statistically significant and negative
(beta = -1.69e-05, 95% CI [-2.38e-05, -9.92e-06], p < .001; Std. beta = -0.18,
95% CI [-0.26, -0.11])
  - The effect of distance to town is statistically significant and negative
(beta = -1.48e-05, 95% CI [-2.07e-05, -8.91e-06], p < .001; Std. beta = -0.18,
95% CI [-0.26, -0.11])
  - The effect of is urbanTRUE is statistically significant and negative (beta =
-0.30, 95% CI [-0.46, -0.14], p < .001; Std. beta = -0.30, 95% CI [-0.46,
-0.14])
  - The effect of usage capacity [1000] is statistically significant and negative
(beta = -0.62, 95% CI [-0.76, -0.49], p < .001; Std. beta = -0.62, 95% CI
[-0.76, -0.49])
  - The effect of water source clean [Protected Shallow Well] is statistically
significant and positive (beta = 0.50, 95% CI [0.34, 0.67], p < .001; Std. beta
= 0.50, 95% CI [0.34, 0.67])
  - The effect of water source clean [Protected Spring] is statistically
significant and positive (beta = 1.29, 95% CI [0.48, 2.23], p = 0.003; Std.
beta = 1.29, 95% CI [0.48, 2.23])
  - The effect of water point population is statistically significant and
negative (beta = -5.10e-04, 95% CI [-6.01e-04, -4.26e-04], p < .001; Std. beta
= -0.74, 95% CI [-0.88, -0.62])
  - The effect of local population 1km is statistically significant and positive
(beta = 3.45e-04, 95% CI [3.11e-04, 3.81e-04], p < .001; Std. beta = 1.45, 95%
CI [1.30, 1.60])

Standardized parameters were obtained by fitting the model on a standardized
version of the dataset. 95% Confidence Intervals (CIs) and p-values were
computed using a Wald z-distribution approximation., We fitted a logistic model
(estimated using ML) to predict status with distance_to_secondary_road
(formula: status ~ distance_to_primary_road + distance_to_secondary_road +
distance_to_tertiary_road + distance_to_city + distance_to_town + is_urban +
usage_capacity + water_source_clean + water_point_population +
local_population_1km). The model's explanatory power is moderate (Tjur's R2 =
0.16). The model's intercept, corresponding to distance_to_secondary_road = 0,
is at 0.39 (95% CI [0.17, 0.61], p < .001). Within this model:

  - The effect of distance to primary road is statistically non-significant and
negative (beta = -4.64e-06, 95% CI [-1.74e-05, 8.07e-06], p = 0.474; Std. beta
= -0.03, 95% CI [-0.10, 0.05])
  - The effect of distance to secondary road is statistically non-significant and
negative (beta = -5.14e-06, 95% CI [-2.34e-05, 1.31e-05], p = 0.580; Std. beta
= -0.02, 95% CI [-0.09, 0.05])
  - The effect of distance to tertiary road is statistically significant and
positive (beta = 9.68e-05, 95% CI [5.64e-05, 1.38e-04], p < .001; Std. beta =
0.16, 95% CI [0.09, 0.23])
  - The effect of distance to city is statistically significant and negative
(beta = -1.69e-05, 95% CI [-2.38e-05, -9.92e-06], p < .001; Std. beta = -0.18,
95% CI [-0.26, -0.11])
  - The effect of distance to town is statistically significant and negative
(beta = -1.48e-05, 95% CI [-2.07e-05, -8.91e-06], p < .001; Std. beta = -0.18,
95% CI [-0.26, -0.11])
  - The effect of is urbanTRUE is statistically significant and negative (beta =
-0.30, 95% CI [-0.46, -0.14], p < .001; Std. beta = -0.30, 95% CI [-0.46,
-0.14])
  - The effect of usage capacity [1000] is statistically significant and negative
(beta = -0.62, 95% CI [-0.76, -0.49], p < .001; Std. beta = -0.62, 95% CI
[-0.76, -0.49])
  - The effect of water source clean [Protected Shallow Well] is statistically
significant and positive (beta = 0.50, 95% CI [0.34, 0.67], p < .001; Std. beta
= 0.50, 95% CI [0.34, 0.67])
  - The effect of water source clean [Protected Spring] is statistically
significant and positive (beta = 1.29, 95% CI [0.48, 2.23], p = 0.003; Std.
beta = 1.29, 95% CI [0.48, 2.23])
  - The effect of water point population is statistically significant and
negative (beta = -5.10e-04, 95% CI [-6.01e-04, -4.26e-04], p < .001; Std. beta
= -0.74, 95% CI [-0.88, -0.62])
  - The effect of local population 1km is statistically significant and positive
(beta = 3.45e-04, 95% CI [3.11e-04, 3.81e-04], p < .001; Std. beta = 1.45, 95%
CI [1.30, 1.60])

Standardized parameters were obtained by fitting the model on a standardized
version of the dataset. 95% Confidence Intervals (CIs) and p-values were
computed using a Wald z-distribution approximation., We fitted a logistic model
(estimated using ML) to predict status with distance_to_tertiary_road (formula:
status ~ distance_to_primary_road + distance_to_secondary_road +
distance_to_tertiary_road + distance_to_city + distance_to_town + is_urban +
usage_capacity + water_source_clean + water_point_population +
local_population_1km). The model's explanatory power is moderate (Tjur's R2 =
0.16). The model's intercept, corresponding to distance_to_tertiary_road = 0,
is at 0.39 (95% CI [0.17, 0.61], p < .001). Within this model:

  - The effect of distance to primary road is statistically non-significant and
negative (beta = -4.64e-06, 95% CI [-1.74e-05, 8.07e-06], p = 0.474; Std. beta
= -0.03, 95% CI [-0.10, 0.05])
  - The effect of distance to secondary road is statistically non-significant and
negative (beta = -5.14e-06, 95% CI [-2.34e-05, 1.31e-05], p = 0.580; Std. beta
= -0.02, 95% CI [-0.09, 0.05])
  - The effect of distance to tertiary road is statistically significant and
positive (beta = 9.68e-05, 95% CI [5.64e-05, 1.38e-04], p < .001; Std. beta =
0.16, 95% CI [0.09, 0.23])
  - The effect of distance to city is statistically significant and negative
(beta = -1.69e-05, 95% CI [-2.38e-05, -9.92e-06], p < .001; Std. beta = -0.18,
95% CI [-0.26, -0.11])
  - The effect of distance to town is statistically significant and negative
(beta = -1.48e-05, 95% CI [-2.07e-05, -8.91e-06], p < .001; Std. beta = -0.18,
95% CI [-0.26, -0.11])
  - The effect of is urbanTRUE is statistically significant and negative (beta =
-0.30, 95% CI [-0.46, -0.14], p < .001; Std. beta = -0.30, 95% CI [-0.46,
-0.14])
  - The effect of usage capacity [1000] is statistically significant and negative
(beta = -0.62, 95% CI [-0.76, -0.49], p < .001; Std. beta = -0.62, 95% CI
[-0.76, -0.49])
  - The effect of water source clean [Protected Shallow Well] is statistically
significant and positive (beta = 0.50, 95% CI [0.34, 0.67], p < .001; Std. beta
= 0.50, 95% CI [0.34, 0.67])
  - The effect of water source clean [Protected Spring] is statistically
significant and positive (beta = 1.29, 95% CI [0.48, 2.23], p = 0.003; Std.
beta = 1.29, 95% CI [0.48, 2.23])
  - The effect of water point population is statistically significant and
negative (beta = -5.10e-04, 95% CI [-6.01e-04, -4.26e-04], p < .001; Std. beta
= -0.74, 95% CI [-0.88, -0.62])
  - The effect of local population 1km is statistically significant and positive
(beta = 3.45e-04, 95% CI [3.11e-04, 3.81e-04], p < .001; Std. beta = 1.45, 95%
CI [1.30, 1.60])

Standardized parameters were obtained by fitting the model on a standardized
version of the dataset. 95% Confidence Intervals (CIs) and p-values were
computed using a Wald z-distribution approximation., We fitted a logistic model
(estimated using ML) to predict status with distance_to_city (formula: status ~
distance_to_primary_road + distance_to_secondary_road +
distance_to_tertiary_road + distance_to_city + distance_to_town + is_urban +
usage_capacity + water_source_clean + water_point_population +
local_population_1km). The model's explanatory power is moderate (Tjur's R2 =
0.16). The model's intercept, corresponding to distance_to_city = 0, is at 0.39
(95% CI [0.17, 0.61], p < .001). Within this model:

  - The effect of distance to primary road is statistically non-significant and
negative (beta = -4.64e-06, 95% CI [-1.74e-05, 8.07e-06], p = 0.474; Std. beta
= -0.03, 95% CI [-0.10, 0.05])
  - The effect of distance to secondary road is statistically non-significant and
negative (beta = -5.14e-06, 95% CI [-2.34e-05, 1.31e-05], p = 0.580; Std. beta
= -0.02, 95% CI [-0.09, 0.05])
  - The effect of distance to tertiary road is statistically significant and
positive (beta = 9.68e-05, 95% CI [5.64e-05, 1.38e-04], p < .001; Std. beta =
0.16, 95% CI [0.09, 0.23])
  - The effect of distance to city is statistically significant and negative
(beta = -1.69e-05, 95% CI [-2.38e-05, -9.92e-06], p < .001; Std. beta = -0.18,
95% CI [-0.26, -0.11])
  - The effect of distance to town is statistically significant and negative
(beta = -1.48e-05, 95% CI [-2.07e-05, -8.91e-06], p < .001; Std. beta = -0.18,
95% CI [-0.26, -0.11])
  - The effect of is urbanTRUE is statistically significant and negative (beta =
-0.30, 95% CI [-0.46, -0.14], p < .001; Std. beta = -0.30, 95% CI [-0.46,
-0.14])
  - The effect of usage capacity [1000] is statistically significant and negative
(beta = -0.62, 95% CI [-0.76, -0.49], p < .001; Std. beta = -0.62, 95% CI
[-0.76, -0.49])
  - The effect of water source clean [Protected Shallow Well] is statistically
significant and positive (beta = 0.50, 95% CI [0.34, 0.67], p < .001; Std. beta
= 0.50, 95% CI [0.34, 0.67])
  - The effect of water source clean [Protected Spring] is statistically
significant and positive (beta = 1.29, 95% CI [0.48, 2.23], p = 0.003; Std.
beta = 1.29, 95% CI [0.48, 2.23])
  - The effect of water point population is statistically significant and
negative (beta = -5.10e-04, 95% CI [-6.01e-04, -4.26e-04], p < .001; Std. beta
= -0.74, 95% CI [-0.88, -0.62])
  - The effect of local population 1km is statistically significant and positive
(beta = 3.45e-04, 95% CI [3.11e-04, 3.81e-04], p < .001; Std. beta = 1.45, 95%
CI [1.30, 1.60])

Standardized parameters were obtained by fitting the model on a standardized
version of the dataset. 95% Confidence Intervals (CIs) and p-values were
computed using a Wald z-distribution approximation., We fitted a logistic model
(estimated using ML) to predict status with distance_to_town (formula: status ~
distance_to_primary_road + distance_to_secondary_road +
distance_to_tertiary_road + distance_to_city + distance_to_town + is_urban +
usage_capacity + water_source_clean + water_point_population +
local_population_1km). The model's explanatory power is moderate (Tjur's R2 =
0.16). The model's intercept, corresponding to distance_to_town = 0, is at 0.39
(95% CI [0.17, 0.61], p < .001). Within this model:

  - The effect of distance to primary road is statistically non-significant and
negative (beta = -4.64e-06, 95% CI [-1.74e-05, 8.07e-06], p = 0.474; Std. beta
= -0.03, 95% CI [-0.10, 0.05])
  - The effect of distance to secondary road is statistically non-significant and
negative (beta = -5.14e-06, 95% CI [-2.34e-05, 1.31e-05], p = 0.580; Std. beta
= -0.02, 95% CI [-0.09, 0.05])
  - The effect of distance to tertiary road is statistically significant and
positive (beta = 9.68e-05, 95% CI [5.64e-05, 1.38e-04], p < .001; Std. beta =
0.16, 95% CI [0.09, 0.23])
  - The effect of distance to city is statistically significant and negative
(beta = -1.69e-05, 95% CI [-2.38e-05, -9.92e-06], p < .001; Std. beta = -0.18,
95% CI [-0.26, -0.11])
  - The effect of distance to town is statistically significant and negative
(beta = -1.48e-05, 95% CI [-2.07e-05, -8.91e-06], p < .001; Std. beta = -0.18,
95% CI [-0.26, -0.11])
  - The effect of is urbanTRUE is statistically significant and negative (beta =
-0.30, 95% CI [-0.46, -0.14], p < .001; Std. beta = -0.30, 95% CI [-0.46,
-0.14])
  - The effect of usage capacity [1000] is statistically significant and negative
(beta = -0.62, 95% CI [-0.76, -0.49], p < .001; Std. beta = -0.62, 95% CI
[-0.76, -0.49])
  - The effect of water source clean [Protected Shallow Well] is statistically
significant and positive (beta = 0.50, 95% CI [0.34, 0.67], p < .001; Std. beta
= 0.50, 95% CI [0.34, 0.67])
  - The effect of water source clean [Protected Spring] is statistically
significant and positive (beta = 1.29, 95% CI [0.48, 2.23], p = 0.003; Std.
beta = 1.29, 95% CI [0.48, 2.23])
  - The effect of water point population is statistically significant and
negative (beta = -5.10e-04, 95% CI [-6.01e-04, -4.26e-04], p < .001; Std. beta
= -0.74, 95% CI [-0.88, -0.62])
  - The effect of local population 1km is statistically significant and positive
(beta = 3.45e-04, 95% CI [3.11e-04, 3.81e-04], p < .001; Std. beta = 1.45, 95%
CI [1.30, 1.60])

Standardized parameters were obtained by fitting the model on a standardized
version of the dataset. 95% Confidence Intervals (CIs) and p-values were
computed using a Wald z-distribution approximation., We fitted a logistic model
(estimated using ML) to predict status with is_urban (formula: status ~
distance_to_primary_road + distance_to_secondary_road +
distance_to_tertiary_road + distance_to_city + distance_to_town + is_urban +
usage_capacity + water_source_clean + water_point_population +
local_population_1km). The model's explanatory power is moderate (Tjur's R2 =
0.16). The model's intercept, corresponding to is_urban = [?], is at 0.39 (95%
CI [0.17, 0.61], p < .001). Within this model:

  - The effect of distance to primary road is statistically non-significant and
negative (beta = -4.64e-06, 95% CI [-1.74e-05, 8.07e-06], p = 0.474; Std. beta
= -0.03, 95% CI [-0.10, 0.05])
  - The effect of distance to secondary road is statistically non-significant and
negative (beta = -5.14e-06, 95% CI [-2.34e-05, 1.31e-05], p = 0.580; Std. beta
= -0.02, 95% CI [-0.09, 0.05])
  - The effect of distance to tertiary road is statistically significant and
positive (beta = 9.68e-05, 95% CI [5.64e-05, 1.38e-04], p < .001; Std. beta =
0.16, 95% CI [0.09, 0.23])
  - The effect of distance to city is statistically significant and negative
(beta = -1.69e-05, 95% CI [-2.38e-05, -9.92e-06], p < .001; Std. beta = -0.18,
95% CI [-0.26, -0.11])
  - The effect of distance to town is statistically significant and negative
(beta = -1.48e-05, 95% CI [-2.07e-05, -8.91e-06], p < .001; Std. beta = -0.18,
95% CI [-0.26, -0.11])
  - The effect of is urbanTRUE is statistically significant and negative (beta =
-0.30, 95% CI [-0.46, -0.14], p < .001; Std. beta = -0.30, 95% CI [-0.46,
-0.14])
  - The effect of usage capacity [1000] is statistically significant and negative
(beta = -0.62, 95% CI [-0.76, -0.49], p < .001; Std. beta = -0.62, 95% CI
[-0.76, -0.49])
  - The effect of water source clean [Protected Shallow Well] is statistically
significant and positive (beta = 0.50, 95% CI [0.34, 0.67], p < .001; Std. beta
= 0.50, 95% CI [0.34, 0.67])
  - The effect of water source clean [Protected Spring] is statistically
significant and positive (beta = 1.29, 95% CI [0.48, 2.23], p = 0.003; Std.
beta = 1.29, 95% CI [0.48, 2.23])
  - The effect of water point population is statistically significant and
negative (beta = -5.10e-04, 95% CI [-6.01e-04, -4.26e-04], p < .001; Std. beta
= -0.74, 95% CI [-0.88, -0.62])
  - The effect of local population 1km is statistically significant and positive
(beta = 3.45e-04, 95% CI [3.11e-04, 3.81e-04], p < .001; Std. beta = 1.45, 95%
CI [1.30, 1.60])

Standardized parameters were obtained by fitting the model on a standardized
version of the dataset. 95% Confidence Intervals (CIs) and p-values were
computed using a Wald z-distribution approximation., We fitted a logistic model
(estimated using ML) to predict status with usage_capacity (formula: status ~
distance_to_primary_road + distance_to_secondary_road +
distance_to_tertiary_road + distance_to_city + distance_to_town + is_urban +
usage_capacity + water_source_clean + water_point_population +
local_population_1km). The model's explanatory power is moderate (Tjur's R2 =
0.16). The model's intercept, corresponding to usage_capacity = 300, is at 0.39
(95% CI [0.17, 0.61], p < .001). Within this model:

  - The effect of distance to primary road is statistically non-significant and
negative (beta = -4.64e-06, 95% CI [-1.74e-05, 8.07e-06], p = 0.474; Std. beta
= -0.03, 95% CI [-0.10, 0.05])
  - The effect of distance to secondary road is statistically non-significant and
negative (beta = -5.14e-06, 95% CI [-2.34e-05, 1.31e-05], p = 0.580; Std. beta
= -0.02, 95% CI [-0.09, 0.05])
  - The effect of distance to tertiary road is statistically significant and
positive (beta = 9.68e-05, 95% CI [5.64e-05, 1.38e-04], p < .001; Std. beta =
0.16, 95% CI [0.09, 0.23])
  - The effect of distance to city is statistically significant and negative
(beta = -1.69e-05, 95% CI [-2.38e-05, -9.92e-06], p < .001; Std. beta = -0.18,
95% CI [-0.26, -0.11])
  - The effect of distance to town is statistically significant and negative
(beta = -1.48e-05, 95% CI [-2.07e-05, -8.91e-06], p < .001; Std. beta = -0.18,
95% CI [-0.26, -0.11])
  - The effect of is urbanTRUE is statistically significant and negative (beta =
-0.30, 95% CI [-0.46, -0.14], p < .001; Std. beta = -0.30, 95% CI [-0.46,
-0.14])
  - The effect of usage capacity [1000] is statistically significant and negative
(beta = -0.62, 95% CI [-0.76, -0.49], p < .001; Std. beta = -0.62, 95% CI
[-0.76, -0.49])
  - The effect of water source clean [Protected Shallow Well] is statistically
significant and positive (beta = 0.50, 95% CI [0.34, 0.67], p < .001; Std. beta
= 0.50, 95% CI [0.34, 0.67])
  - The effect of water source clean [Protected Spring] is statistically
significant and positive (beta = 1.29, 95% CI [0.48, 2.23], p = 0.003; Std.
beta = 1.29, 95% CI [0.48, 2.23])
  - The effect of water point population is statistically significant and
negative (beta = -5.10e-04, 95% CI [-6.01e-04, -4.26e-04], p < .001; Std. beta
= -0.74, 95% CI [-0.88, -0.62])
  - The effect of local population 1km is statistically significant and positive
(beta = 3.45e-04, 95% CI [3.11e-04, 3.81e-04], p < .001; Std. beta = 1.45, 95%
CI [1.30, 1.60])

Standardized parameters were obtained by fitting the model on a standardized
version of the dataset. 95% Confidence Intervals (CIs) and p-values were
computed using a Wald z-distribution approximation., We fitted a logistic model
(estimated using ML) to predict status with water_source_clean (formula: status
~ distance_to_primary_road + distance_to_secondary_road +
distance_to_tertiary_road + distance_to_city + distance_to_town + is_urban +
usage_capacity + water_source_clean + water_point_population +
local_population_1km). The model's explanatory power is moderate (Tjur's R2 =
0.16). The model's intercept, corresponding to water_source_clean = Borehole,
is at 0.39 (95% CI [0.17, 0.61], p < .001). Within this model:

  - The effect of distance to primary road is statistically non-significant and
negative (beta = -4.64e-06, 95% CI [-1.74e-05, 8.07e-06], p = 0.474; Std. beta
= -0.03, 95% CI [-0.10, 0.05])
  - The effect of distance to secondary road is statistically non-significant and
negative (beta = -5.14e-06, 95% CI [-2.34e-05, 1.31e-05], p = 0.580; Std. beta
= -0.02, 95% CI [-0.09, 0.05])
  - The effect of distance to tertiary road is statistically significant and
positive (beta = 9.68e-05, 95% CI [5.64e-05, 1.38e-04], p < .001; Std. beta =
0.16, 95% CI [0.09, 0.23])
  - The effect of distance to city is statistically significant and negative
(beta = -1.69e-05, 95% CI [-2.38e-05, -9.92e-06], p < .001; Std. beta = -0.18,
95% CI [-0.26, -0.11])
  - The effect of distance to town is statistically significant and negative
(beta = -1.48e-05, 95% CI [-2.07e-05, -8.91e-06], p < .001; Std. beta = -0.18,
95% CI [-0.26, -0.11])
  - The effect of is urbanTRUE is statistically significant and negative (beta =
-0.30, 95% CI [-0.46, -0.14], p < .001; Std. beta = -0.30, 95% CI [-0.46,
-0.14])
  - The effect of usage capacity [1000] is statistically significant and negative
(beta = -0.62, 95% CI [-0.76, -0.49], p < .001; Std. beta = -0.62, 95% CI
[-0.76, -0.49])
  - The effect of water source clean [Protected Shallow Well] is statistically
significant and positive (beta = 0.50, 95% CI [0.34, 0.67], p < .001; Std. beta
= 0.50, 95% CI [0.34, 0.67])
  - The effect of water source clean [Protected Spring] is statistically
significant and positive (beta = 1.29, 95% CI [0.48, 2.23], p = 0.003; Std.
beta = 1.29, 95% CI [0.48, 2.23])
  - The effect of water point population is statistically significant and
negative (beta = -5.10e-04, 95% CI [-6.01e-04, -4.26e-04], p < .001; Std. beta
= -0.74, 95% CI [-0.88, -0.62])
  - The effect of local population 1km is statistically significant and positive
(beta = 3.45e-04, 95% CI [3.11e-04, 3.81e-04], p < .001; Std. beta = 1.45, 95%
CI [1.30, 1.60])

Standardized parameters were obtained by fitting the model on a standardized
version of the dataset. 95% Confidence Intervals (CIs) and p-values were
computed using a Wald z-distribution approximation., We fitted a logistic model
(estimated using ML) to predict status with water_point_population (formula:
status ~ distance_to_primary_road + distance_to_secondary_road +
distance_to_tertiary_road + distance_to_city + distance_to_town + is_urban +
usage_capacity + water_source_clean + water_point_population +
local_population_1km). The model's explanatory power is moderate (Tjur's R2 =
0.16). The model's intercept, corresponding to water_point_population = 0, is
at 0.39 (95% CI [0.17, 0.61], p < .001). Within this model:

  - The effect of distance to primary road is statistically non-significant and
negative (beta = -4.64e-06, 95% CI [-1.74e-05, 8.07e-06], p = 0.474; Std. beta
= -0.03, 95% CI [-0.10, 0.05])
  - The effect of distance to secondary road is statistically non-significant and
negative (beta = -5.14e-06, 95% CI [-2.34e-05, 1.31e-05], p = 0.580; Std. beta
= -0.02, 95% CI [-0.09, 0.05])
  - The effect of distance to tertiary road is statistically significant and
positive (beta = 9.68e-05, 95% CI [5.64e-05, 1.38e-04], p < .001; Std. beta =
0.16, 95% CI [0.09, 0.23])
  - The effect of distance to city is statistically significant and negative
(beta = -1.69e-05, 95% CI [-2.38e-05, -9.92e-06], p < .001; Std. beta = -0.18,
95% CI [-0.26, -0.11])
  - The effect of distance to town is statistically significant and negative
(beta = -1.48e-05, 95% CI [-2.07e-05, -8.91e-06], p < .001; Std. beta = -0.18,
95% CI [-0.26, -0.11])
  - The effect of is urbanTRUE is statistically significant and negative (beta =
-0.30, 95% CI [-0.46, -0.14], p < .001; Std. beta = -0.30, 95% CI [-0.46,
-0.14])
  - The effect of usage capacity [1000] is statistically significant and negative
(beta = -0.62, 95% CI [-0.76, -0.49], p < .001; Std. beta = -0.62, 95% CI
[-0.76, -0.49])
  - The effect of water source clean [Protected Shallow Well] is statistically
significant and positive (beta = 0.50, 95% CI [0.34, 0.67], p < .001; Std. beta
= 0.50, 95% CI [0.34, 0.67])
  - The effect of water source clean [Protected Spring] is statistically
significant and positive (beta = 1.29, 95% CI [0.48, 2.23], p = 0.003; Std.
beta = 1.29, 95% CI [0.48, 2.23])
  - The effect of water point population is statistically significant and
negative (beta = -5.10e-04, 95% CI [-6.01e-04, -4.26e-04], p < .001; Std. beta
= -0.74, 95% CI [-0.88, -0.62])
  - The effect of local population 1km is statistically significant and positive
(beta = 3.45e-04, 95% CI [3.11e-04, 3.81e-04], p < .001; Std. beta = 1.45, 95%
CI [1.30, 1.60])

Standardized parameters were obtained by fitting the model on a standardized
version of the dataset. 95% Confidence Intervals (CIs) and p-values were
computed using a Wald z-distribution approximation. and We fitted a logistic
model (estimated using ML) to predict status with local_population_1km
(formula: status ~ distance_to_primary_road + distance_to_secondary_road +
distance_to_tertiary_road + distance_to_city + distance_to_town + is_urban +
usage_capacity + water_source_clean + water_point_population +
local_population_1km). The model's explanatory power is moderate (Tjur's R2 =
0.16). The model's intercept, corresponding to local_population_1km = 0, is at
0.39 (95% CI [0.17, 0.61], p < .001). Within this model:

  - The effect of distance to primary road is statistically non-significant and
negative (beta = -4.64e-06, 95% CI [-1.74e-05, 8.07e-06], p = 0.474; Std. beta
= -0.03, 95% CI [-0.10, 0.05])
  - The effect of distance to secondary road is statistically non-significant and
negative (beta = -5.14e-06, 95% CI [-2.34e-05, 1.31e-05], p = 0.580; Std. beta
= -0.02, 95% CI [-0.09, 0.05])
  - The effect of distance to tertiary road is statistically significant and
positive (beta = 9.68e-05, 95% CI [5.64e-05, 1.38e-04], p < .001; Std. beta =
0.16, 95% CI [0.09, 0.23])
  - The effect of distance to city is statistically significant and negative
(beta = -1.69e-05, 95% CI [-2.38e-05, -9.92e-06], p < .001; Std. beta = -0.18,
95% CI [-0.26, -0.11])
  - The effect of distance to town is statistically significant and negative
(beta = -1.48e-05, 95% CI [-2.07e-05, -8.91e-06], p < .001; Std. beta = -0.18,
95% CI [-0.26, -0.11])
  - The effect of is urbanTRUE is statistically significant and negative (beta =
-0.30, 95% CI [-0.46, -0.14], p < .001; Std. beta = -0.30, 95% CI [-0.46,
-0.14])
  - The effect of usage capacity [1000] is statistically significant and negative
(beta = -0.62, 95% CI [-0.76, -0.49], p < .001; Std. beta = -0.62, 95% CI
[-0.76, -0.49])
  - The effect of water source clean [Protected Shallow Well] is statistically
significant and positive (beta = 0.50, 95% CI [0.34, 0.67], p < .001; Std. beta
= 0.50, 95% CI [0.34, 0.67])
  - The effect of water source clean [Protected Spring] is statistically
significant and positive (beta = 1.29, 95% CI [0.48, 2.23], p = 0.003; Std.
beta = 1.29, 95% CI [0.48, 2.23])
  - The effect of water point population is statistically significant and
negative (beta = -5.10e-04, 95% CI [-6.01e-04, -4.26e-04], p < .001; Std. beta
= -0.74, 95% CI [-0.88, -0.62])
  - The effect of local population 1km is statistically significant and positive
(beta = 3.45e-04, 95% CI [3.11e-04, 3.81e-04], p < .001; Std. beta = 1.45, 95%
CI [1.30, 1.60])

Standardized parameters were obtained by fitting the model on a standardized
version of the dataset. 95% Confidence Intervals (CIs) and p-values were
computed using a Wald z-distribution approximation.

Exclude the objects that is not statistically significant, p_value> 0.05.

blr_confusion_matrix(model, cutoff = 0.5)
Confusion Matrix and Statistics 

          Reference
Prediction FALSE TRUE
         0  1301  738
         1   813 1904

                Accuracy : 0.6739 
     No Information Rate : 0.4445 

                   Kappa : 0.3373 

McNemars's Test P-Value  : 0.0602 

             Sensitivity : 0.7207 
             Specificity : 0.6154 
          Pos Pred Value : 0.7008 
          Neg Pred Value : 0.6381 
              Prevalence : 0.5555 
          Detection Rate : 0.4003 
    Detection Prevalence : 0.5713 
       Balanced Accuracy : 0.6680 
               Precision : 0.7008 
                  Recall : 0.7207 

        'Positive' Class : 1

The validity of a cut off is measured using sensitivity, specificity and accuracy.

0.5 is the cut off point the functional and non functional. We have the flexibility to set the value.

True positive is better than true negative.

Osun_wp_sp <- Osun_wp_sf_clean %>%
  select(c(status,
           distance_to_primary_road,
           distance_to_secondary_road,
           distance_to_tertiary_road,
           distance_to_city,
           distance_to_town,
           water_point_population,
           local_population_1km,
           usage_capacity,
           is_urban,
           water_source_clean)) %>%
  as_Spatial()
Osun_wp_sp
class       : SpatialPointsDataFrame 
features    : 4756 
extent      : 182502.4, 290751, 340054.1, 450905.3  (xmin, xmax, ymin, ymax)
crs         : +proj=tmerc +lat_0=4 +lon_0=8.5 +k=0.99975 +x_0=670553.98 +y_0=0 +a=6378249.145 +rf=293.465 +towgs84=-92,-93,122,0,0,0,0 +units=m +no_defs 
variables   : 11
names       : status, distance_to_primary_road, distance_to_secondary_road, distance_to_tertiary_road, distance_to_city, distance_to_town, water_point_population, local_population_1km, usage_capacity, is_urban, water_source_clean 
min values  :      0,        0.014461356813335,          0.152195902540837,         0.017815121653488, 53.0461399623541, 30.0019777713073,                      0,                    0,           1000,        0,           Borehole 
max values  :      1,         26909.8616132094,           19559.4793799085,          10966.2705628969,  47934.343603562, 44020.6393368124,                  29697,                36118,            300,        1,   Protected Spring 
Osun_wp_sp
class       : SpatialPointsDataFrame 
features    : 4756 
extent      : 182502.4, 290751, 340054.1, 450905.3  (xmin, xmax, ymin, ymax)
crs         : +proj=tmerc +lat_0=4 +lon_0=8.5 +k=0.99975 +x_0=670553.98 +y_0=0 +a=6378249.145 +rf=293.465 +towgs84=-92,-93,122,0,0,0,0 +units=m +no_defs 
variables   : 11
names       : status, distance_to_primary_road, distance_to_secondary_road, distance_to_tertiary_road, distance_to_city, distance_to_town, water_point_population, local_population_1km, usage_capacity, is_urban, water_source_clean 
min values  :      0,        0.014461356813335,          0.152195902540837,         0.017815121653488, 53.0461399623541, 30.0019777713073,                      0,                    0,           1000,        0,           Borehole 
max values  :      1,         26909.8616132094,           19559.4793799085,          10966.2705628969,  47934.343603562, 44020.6393368124,                  29697,                36118,            300,        1,   Protected Spring 
bw.fixed <- bw.ggwr(status ~ 
               distance_to_primary_road+
               distance_to_secondary_road+
               distance_to_tertiary_road+
               distance_to_city+
               distance_to_town+
               water_point_population+
               local_population_1km+
               is_urban+
               usage_capacity+
               water_source_clean,
             data = Osun_wp_sp,
             family = "binomial",
             approach = "AIC",
             kernel = "gaussian",
             adaptive = FALSE,
             longlat = FALSE)
Take a cup of tea and have a break, it will take a few minutes.
          -----A kind suggestion from GWmodel development group
 Iteration    Log-Likelihood:(With bandwidth:  95768.67 )
=========================
       0        -2889 
       1        -2836 
       2        -2830 
       3        -2829 
       4        -2829 
       5        -2829 
Fixed bandwidth: 95768.67 AICc value: 5684.357 
 Iteration    Log-Likelihood:(With bandwidth:  59200.13 )
=========================
       0        -2875 
       1        -2818 
       2        -2810 
       3        -2808 
       4        -2808 
       5        -2808 
Fixed bandwidth: 59200.13 AICc value: 5646.785 
 Iteration    Log-Likelihood:(With bandwidth:  36599.53 )
=========================
       0        -2847 
       1        -2781 
       2        -2768 
       3        -2765 
       4        -2765 
       5        -2765 
       6        -2765 
Fixed bandwidth: 36599.53 AICc value: 5575.148 
 Iteration    Log-Likelihood:(With bandwidth:  22631.59 )
=========================
       0        -2798 
       1        -2719 
       2        -2698 
       3        -2693 
       4        -2693 
       5        -2693 
       6        -2693 
Fixed bandwidth: 22631.59 AICc value: 5466.883 
 Iteration    Log-Likelihood:(With bandwidth:  13998.93 )
=========================
       0        -2720 
       1        -2622 
       2        -2590 
       3        -2581 
       4        -2580 
       5        -2580 
       6        -2580 
       7        -2580 
Fixed bandwidth: 13998.93 AICc value: 5324.578 
 Iteration    Log-Likelihood:(With bandwidth:  8663.649 )
=========================
       0        -2601 
       1        -2476 
       2        -2431 
       3        -2419 
       4        -2417 
       5        -2417 
       6        -2417 
       7        -2417 
Fixed bandwidth: 8663.649 AICc value: 5163.61 
 Iteration    Log-Likelihood:(With bandwidth:  5366.266 )
=========================
       0        -2436 
       1        -2268 
       2        -2194 
       3        -2167 
       4        -2161 
       5        -2161 
       6        -2161 
       7        -2161 
       8        -2161 
       9        -2161 
Fixed bandwidth: 5366.266 AICc value: 4990.587 
 Iteration    Log-Likelihood:(With bandwidth:  3328.371 )
=========================
       0        -2157 
       1        -1922 
       2        -1802 
       3        -1739 
       4        -1713 
       5        -1713 
Fixed bandwidth: 3328.371 AICc value: 4798.288 
 Iteration    Log-Likelihood:(With bandwidth:  2068.882 )
=========================
       0        -1751 
       1        -1421 
       2        -1238 
       3        -1133 
       4        -1084 
       5        -1084 
Fixed bandwidth: 2068.882 AICc value: 4837.017 
 Iteration    Log-Likelihood:(With bandwidth:  4106.777 )
=========================
       0        -2297 
       1        -2095 
       2        -1997 
       3        -1951 
       4        -1938 
       5        -1936 
       6        -1936 
       7        -1936 
       8        -1936 
Fixed bandwidth: 4106.777 AICc value: 4873.161 
 Iteration    Log-Likelihood:(With bandwidth:  2847.289 )
=========================
       0        -2036 
       1        -1771 
       2        -1633 
       3        -1558 
       4        -1525 
       5        -1525 
Fixed bandwidth: 2847.289 AICc value: 4768.192 
 Iteration    Log-Likelihood:(With bandwidth:  2549.964 )
=========================
       0        -1941 
       1        -1655 
       2        -1503 
       3        -1417 
       4        -1378 
       5        -1378 
Fixed bandwidth: 2549.964 AICc value: 4762.212 
 Iteration    Log-Likelihood:(With bandwidth:  2366.207 )
=========================
       0        -1874 
       1        -1573 
       2        -1410 
       3        -1316 
       4        -1274 
       5        -1274 
Fixed bandwidth: 2366.207 AICc value: 4773.081 
 Iteration    Log-Likelihood:(With bandwidth:  2663.532 )
=========================
       0        -1979 
       1        -1702 
       2        -1555 
       3        -1474 
       4        -1438 
       5        -1438 
Fixed bandwidth: 2663.532 AICc value: 4762.568 
 Iteration    Log-Likelihood:(With bandwidth:  2479.775 )
=========================
       0        -1917 
       1        -1625 
       2        -1468 
       3        -1380 
       4        -1339 
       5        -1339 
Fixed bandwidth: 2479.775 AICc value: 4764.294 
 Iteration    Log-Likelihood:(With bandwidth:  2593.343 )
=========================
       0        -1956 
       1        -1674 
       2        -1523 
       3        -1439 
       4        -1401 
       5        -1401 
Fixed bandwidth: 2593.343 AICc value: 4761.813 
 Iteration    Log-Likelihood:(With bandwidth:  2620.153 )
=========================
       0        -1965 
       1        -1685 
       2        -1536 
       3        -1453 
       4        -1415 
       5        -1415 
Fixed bandwidth: 2620.153 AICc value: 4761.89 
 Iteration    Log-Likelihood:(With bandwidth:  2576.774 )
=========================
       0        -1950 
       1        -1667 
       2        -1515 
       3        -1431 
       4        -1393 
       5        -1393 
Fixed bandwidth: 2576.774 AICc value: 4761.889 
 Iteration    Log-Likelihood:(With bandwidth:  2603.584 )
=========================
       0        -1960 
       1        -1678 
       2        -1528 
       3        -1445 
       4        -1407 
       5        -1407 
Fixed bandwidth: 2603.584 AICc value: 4761.813 
 Iteration    Log-Likelihood:(With bandwidth:  2609.913 )
=========================
       0        -1962 
       1        -1680 
       2        -1531 
       3        -1448 
       4        -1410 
       5        -1410 
Fixed bandwidth: 2609.913 AICc value: 4761.831 
 Iteration    Log-Likelihood:(With bandwidth:  2599.672 )
=========================
       0        -1958 
       1        -1676 
       2        -1526 
       3        -1443 
       4        -1405 
       5        -1405 
Fixed bandwidth: 2599.672 AICc value: 4761.809 
 Iteration    Log-Likelihood:(With bandwidth:  2597.255 )
=========================
       0        -1957 
       1        -1675 
       2        -1525 
       3        -1441 
       4        -1403 
       5        -1403 
Fixed bandwidth: 2597.255 AICc value: 4761.809 
bw.fixed
[1] 2599.672
gwlr.fixed <- ggwr.basic(status ~
                           distance_to_primary_road +
                           distance_to_secondary_road +
                           distance_to_tertiary_road +
                           distance_to_city +
                           distance_to_town +
                           water_point_population +
                           local_population_1km +
                           usage_capacity +
                           is_urban +
                           water_source_clean,
                         data = Osun_wp_sp,
                         bw = 2597.255,
                         family = "binomial",
                         kernel = "gaussian",
                         adaptive = FALSE,
                         longlat = FALSE)
 Iteration    Log-Likelihood
=========================
       0        -1957 
       1        -1675 
       2        -1525 
       3        -1441 
       4        -1403 
       5        -1403 
gwr.fixed <- as.data.frame(gwlr.fixed$SDF)

Next we will label yhat value greater or equal to 0.5 into 1 and else 0. The result of the logi comparison operation will be saved into a field called most.

gwr.fixed <- gwr.fixed %>%
  mutate(most = ifelse(
    gwr.fixed$yhat >= 0.5, T, F
  ))
gwr.fixed$y <- as.factor(gwr.fixed$y)
gwr.fixed$most <- as.factor(gwr.fixed$most)
CM <- confusionMatrix(data=gwr.fixed$most, reference = gwr.fixed$y)
CM
Confusion Matrix and Statistics

          Reference
Prediction FALSE TRUE
     FALSE  1824  263
     TRUE    290 2379
                                          
               Accuracy : 0.8837          
                 95% CI : (0.8743, 0.8927)
    No Information Rate : 0.5555          
    P-Value [Acc > NIR] : <2e-16          
                                          
                  Kappa : 0.7642          
                                          
 Mcnemar's Test P-Value : 0.2689          
                                          
            Sensitivity : 0.8628          
            Specificity : 0.9005          
         Pos Pred Value : 0.8740          
         Neg Pred Value : 0.8913          
             Prevalence : 0.4445          
         Detection Rate : 0.3835          
   Detection Prevalence : 0.4388          
      Balanced Accuracy : 0.8816          
                                          
       'Positive' Class : FALSE           
                                          

The true nagative increase to 0.9. we have have applied localized strategy to for better analysis.

Osun_wp_sf_selected <- Osun_wp_sf_clean %>%
  select(c(ADM2_EN, ADM2_PCODE,
           ADM1_EN, ADM1_PCODE,
           status))
gwr_sf.fixed <- cbind(Osun_wp_sf_selected, gwr.fixed)
tmap_mode("view")
tmap mode set to interactive viewing
prob_T <- tm_shape(Osun) + 
  tm_polygons(alpha = 0.1) + 
tm_shape(gwr_sf.fixed) + 
  tm_dots(col = "yhat",
          border.col = "gray60",
          border.lwd = 1) + 
  tm_view(set.zoom.limits = c(8,14))
prob_T